import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data=pd.read_excel("北京市空气质量数据.xlsx")
print(data.head())
# 处理空值
print(data.loc[data['SO2'].isnull()])
data=data.replace(np.nan,0)
print(data.loc[data['SO2'].isnull()])

# 画图
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
# AQI走势
plt.plot(data['AQI'],linestyle='-',linewidth=0.5)
plt.axhline(y=data['AQI'].mean(),color='red',linestyle='-',linewidth=0.5,label="AQI总平均值")
data['年']=data['日期'].dt.year
year=['2014年','2015年','2016年']
col=['blue','green','yellow']
for i in range(3):
    plt.axhline(y=data['AQI'].groupby(data['年']).mean().values[i],color=col[i],linestyle='--',linewidth=0.5,label=year[i])
plt.title("2014-2016年折线图")
plt.xlabel('年份')
plt.ylabel('AQI')
plt.xticks([1,365,365*2],year)
plt.yticks([data['AQI'].mean()],['AQI平均值'])
plt.legend()
plt.text(x=list(data['AQI']).index(data['AQI'].max()),y=data['AQI'].max()-20,s="空气质量最差日",color='red')
plt.show()

plt.subplot(2,2,1)
plt.plot(data['AQI'].groupby(data['年']).mean().values)
plt.title('AQI均值折线图')
plt.xticks([0,1,2],year)
plt.subplot(2,2,2)
plt.hist(data['AQI'],bins=20)
plt.title('AQI直方图')
plt.subplot(2,2,3)
plt.scatter(data['PM2.5'],data['AQI'])
plt.title('PM2.5和AQI相关性')
plt.xlabel("PM2.5")
plt.ylabel("AQI")
plt.subplot(2,2,4)
zhiliang=pd.value_counts(data['质量等级'],sort=False)
plt.pie(zhiliang,labels=zhiliang.index,autopct='%3.1f%%')
plt.title("空气质量比例")
plt.show()

fig,axes=plt.subplots(nrows=2,ncols=3,figsize=(10,5))
axes[0,0].scatter(data['PM2.5'],data['AQI'])
axes[0,0].set_title('PM2.5和AQI相关性')
axes[0,0].set_xlabel("PM2.5")
axes[0,0].set_ylabel("AQI")

axes[0,1].scatter(data['PM10'],data['AQI'])
axes[0,1].set_title('PM2.5和AQI相关性')
axes[0,1].set_xlabel("PM10")
axes[0,1].set_ylabel("AQI")

axes[0,2].scatter(data['SO2'],data['AQI'])
axes[0,2].set_title('SO2和AQI相关性')
axes[0,2].set_xlabel("SO2")
axes[0,2].set_ylabel("AQI")

axes[1,0].scatter(data['CO'],data['AQI'])
axes[1,0].set_title('CO和AQI相关性')
axes[1,0].set_xlabel("CO")
axes[1,0].set_ylabel("AQI")

axes[1,1].scatter(data['NO2'],data['AQI'])
axes[1,1].set_title('NO2和AQI相关性')
axes[1,1].set_xlabel("NO2")
axes[1,1].set_ylabel("AQI")

axes[1,2].scatter(data['O3'],data['AQI'])
axes[1,2].set_title('O3和AQI相关性')
axes[1,2].set_xlabel("O3")
axes[1,2].set_ylabel("AQI")

plt.show()